# Replication materials for Clause Analysis (Van Atteveldt et al., Political Analysis)
#
# Code to replicate the gold-standard validation prsented in table 2
# This code requires the gold_tokens.rds and gold_coding.rds published in the dataverse repository

# Install the version of rsyntax used for the manuscript with:
# devtools::install_github("anon-author/rsyntax")
# Note: this library version is for replication only! 

library(rsyntax)
source("functions.r")

# load tokens (parsed sentences) and manual "gold" coding
tokens = readRDS("gold_tokens.rds")
manual = readRDS("gold_coding.rds")

# compute quotes and clauses from tokens
tokens = subset(tokens, sentence %in% manual$sentence)
quotes = get_quotes(tokens)
clauses = get_clauses(tokens, quotes=quotes)

# compute source/subject/object triples for aggressions
ai_triples = get.aggressions(tokens, quotes, clauses)
# drop triples without object if the same sentence has a triple with object
sentsub = paste(ai_triples$sentence, ai_triples$subject)
with_object = sentsub[ai_triples$subject != "" & ai_triples$object != ""]
ai_triples = ai_triples[!(sentsub %in% with_object & ai_triples$object == ""), ]
# (and vice versa for subject)
sentobj = paste(ai_triples$sentence, ai_triples$object)
with_subject = sentobj[ai_triples$subject != "" & ai_triples$object != ""]
ai_triples = ai_triples[!(sentobj %in% with_subject & ai_triples$subject == ""), ]
# Gaza indicates Hamas in the object position 
gazapred = clauses$clause_id[clauses$clause_role == "predicate" & clauses$id %in% tokens$id[tokens$lemma %in% c("Gaza", "Strip")]]
ai_triples$object[ai_triples$subject == "israel" & ai_triples$object == "" & ai_triples$clause_id %in% gazapred] = "hamas"

# compute scores
compare(manual, merge_incomplete(ai_triples))
compare.sources(manual, ai_triples)

compare(manual, get.baseline(tokens))
compare.sources(manual, get.baseline.sources(tokens))